Now it is time to read in the data and get into a workable format for our problem

# read in the data
df <- read.csv('Data/NEISS/sport_category_final.csv')

# create a contigency table 
orig_table <- table(df$prod1,df$body_part)

# create a datafame from the contigency table (at least one that is intuitive)
tm<-as.data.frame.matrix(orig_table) # tm stands for table matrix

and some further cleanup. This time, we need to ensure that all of the classifications of the original data belong to the same categories of that in the Berger and Trinkaus paper.

head_neck <- tm$face + tm$neck + tm$head
shoulder_arm <- tm$`upper arm` + tm$`lower arm` + tm$shoulder + tm$elbow
hand <- tm$hand + tm$finger + tm$wrist
pelvis <- tm$`pubic region` + tm$hip 
leg <- tm$knee + tm$`lower leg` + tm$`upper leg`
foot <- tm$foot + tm$toe + tm$ankle
trunk <- tm$`upper trunk` + tm$back

Now let’s wrap this all into a final dataframe that we will use throughout the rest of this analysis.

final<-as.data.frame(cbind(head_neck,shoulder_arm,hand,pelvis,leg,foot,trunk))
rownames(final)<-rownames(tm) # index will be the activity name

Grabbing the values off of the Berger and Trinkaus paper is the first task

sample1<-c(8,4,7,1,1,3,3) # total sample
sample2<-c(7,4,7,1,0,3,1) # without djd
sample3<-c(6,4,5,1,1,3,1) # without shandidar 1
sample4<-c(5,4,5,1,0,3,0) # without djd or shandidar 1

Now it is time to run the actual analysis. We are going to create some tables the involve running a chi square test on all of the samples against all of the activities as well as calculating Cramer’s V.

# total sample
nt<-t(apply(final,1,function(x) {
  new<- cbind(sample1,x)
  ch <- chisq.test(new)
  chi<-c(unname(ch$statistic), ch$p.value)
  cram<-CramerV(new)
  cbind(chi,cram)
}))

# sample with out djd
nwd<-t(apply(final,1,function(x) {
  new<- cbind(sample2,x)
  ch <- chisq.test(new)
  chi<-c(unname(ch$statistic), ch$p.value)
  cram<-CramerV(new)
  cbind(chi,cram)
}))

# sample without shandidar
nws<-t(apply(final,1,function(x) {
  new<- cbind(sample3,x)
  ch <- chisq.test(new)
  chi<-c(unname(ch$statistic), ch$p.value)
  cram<-CramerV(new)
  cbind(chi,cram)
}))

# sample without shandidar or djd
nwsd<-t(apply(final,1,function(x) {
  new<- cbind(sample4,x)
  ch <- chisq.test(new)
  chi<-c(unname(ch$statistic), ch$p.value)
  cram<-CramerV(new)
  cbind(chi,cram)
}))

Create a function to clean up some vital information about the Chi Square test

chi2cleanup<-function(table){
  
  # read in one of the chi square tables (nt, nwd, nws or nwsd)
  frame<-as.data.frame(table,row.names = rownames(table))
  frame<-frame[-c(4)] # drop the extra p value column
  names(frame) = c('X2','P-Value','Cramers V')
  fin = na.omit(frame) 
  
  # create two callable objects, 1) the rows that have NAs and 2) the final data frame for some final manipulation
  c<- list(
    rowna=frame[is.nan(frame$X2),],   # $rowna 
    final=fin,                        # $final
    similar=fin[fin$`P-Value` > .05,] # $similar (activities that are similar, P > 0.05)
  )
  return(c)
}

Analysis

# name cleaned up chi square tables
n_tot<-chi2cleanup(nt)  # neanderthal total
n_djd<-chi2cleanup(nwd) # neanderthal w/o djd
n_s<-chi2cleanup(nws)   # neanderthal w/o shan
n_sd<-chi2cleanup(nwsd) # neanderthal w/o shan or djd

Find similar activities per sample

n_tot$similar   # neanderthal total
##                                   X2    P-Value Cramers V
## altar                       3.576823 0.73372229 0.2293476
## banisters                  11.100461 0.08532075 0.4265850
## benches                     8.976722 0.17489146 0.2781826
## bleachers                  10.958421 0.08967001 0.2658938
## diving board                7.396248 0.28575071 0.2616941
## flying discs or boomerangs 11.355433 0.07799320 0.2944192
## golf                       11.749776 0.06778954 0.2181053
## golf carts                  9.972497 0.12581503 0.2179176
## lawn chair                 10.638039 0.10022845 0.3791534
## loading docks              11.923561 0.06369594 0.4014090
## poles                      11.112463 0.08496219 0.3553561
## roller hockey               9.546665 0.14508720 0.4022535
## snow tubing                10.748946 0.09645243 0.3946921
## swimsuit                    7.175379 0.30493252 0.2165592
## water tubing                2.973634 0.81214870 0.1667062
## windsurfing                12.534779 0.05104817 0.2221475
n_djd$similar   # neanderthal w/o djd
##                                   X2    P-Value  Cramers V
## altar                       4.682090 0.58518450 0.27047673
## baseball                    6.979352 0.32276095 0.04624864
## benches                     9.566742 0.14412345 0.29226244
## bleachers                  11.207769 0.08216346 0.27244015
## diving board               10.945581 0.09007295 0.32441635
## flying discs or boomerangs 11.922667 0.06371642 0.30639724
## golf carts                 12.067448 0.06048094 0.24203273
## paddle ball                11.052587 0.08676471 0.36939368
## poles                       8.549072 0.20057074 0.31902142
## roller hockey               8.292168 0.21747021 0.38828687
## swimsuit                    9.449321 0.14984055 0.25182983
## water tubing                5.801308 0.44581190 0.23732547
n_s$similar     # neanderthal w/o shan
##                                       X2    P-Value  Cramers V
## altar                           1.972567 0.92220419 0.17836929
## banisters                      11.173352 0.08316429 0.45072369
## baseball                        6.915363 0.32874544 0.04605026
## benches                         6.139257 0.40777277 0.23624442
## bleachers                       5.866440 0.43831681 0.19842399
## cheerleading                   11.380472 0.07730557 0.17192926
## diving board                    7.126857 0.30927482 0.26433150
## flying discs or boomerangs      7.098503 0.31183373 0.23830238
## golf                            7.484431 0.27835805 0.17622636
## golf carts                      6.657463 0.35368900 0.18065056
## lawn chair                     10.363262 0.11016446 0.39038559
## loading docks                   8.473317 0.20543589 0.35299802
## mountain/all-terrain biking    10.561468 0.10291280 0.21713915
## paddle ball                     9.776645 0.13437860 0.35178829
## patios/flooring                 9.538668 0.14547269 0.23899324
## poles                           6.966236 0.32398093 0.29146887
## roller hockey                   5.897077 0.43481796 0.33356498
## snow tubing                     9.206119 0.16231362 0.38226806
## swimming pools (not specified)  9.871133 0.13018486 0.23288837
## swimsuit                        6.633137 0.35610991 0.21242265
## water tubing                    2.910867 0.81994803 0.16976590
## windsurfing                     9.849892 0.13111741 0.19929201
n_sd$similar    # neanderthal w/o shan or djd
##                                    X2    P-Value  Cramers V
## altar                        4.315178 0.63410647 0.27044152
## baseball                     6.189503 0.40230034 0.04358653
## benches                      8.090566 0.23154305 0.27497775
## bleachers                    7.957986 0.24119759 0.23346681
## cheerleading                11.047118 0.08693106 0.17005634
## diving board                11.526266 0.07341106 0.34121391
## flying discs or boomerangs   9.385163 0.15304725 0.27735819
## golf                         9.848193 0.13119225 0.20341819
## golf carts                   9.484323 0.14811598 0.21722266
## lawn chair                  11.823483 0.06602455 0.42649705
## loading docks               10.444960 0.10712155 0.40086369
## mountain/all-terrain biking 11.766333 0.06738931 0.23074082
## paddle ball                  9.957890 0.12643658 0.36197358
## patios/flooring             10.016769 0.12394763 0.24713935
## poles                        7.326147 0.29173540 0.30452592
## snow tubing                 12.332112 0.05495688 0.45335991
## swimsuit                     9.999291 0.12468188 0.26351380
## water tubing                 6.295139 0.39095341 0.25344844

Lets look at what Rodeo Riders look like in comparison to the Neanderthal samples

n_tot$final['rodeo',]   # neanderthal total
##             X2     P-Value Cramers V
## rodeo 23.62703 0.000611504 0.2198114
n_djd$final['rodeo',]   # neanderthal w/o djd
##             X2     P-Value Cramers V
## rodeo 22.25172 0.001090031 0.2141958
n_s$final['rodeo',]     # neanderthal w/o shan
##             X2    P-Value Cramers V
## rodeo 16.68168 0.01052714 0.1858431
n_sd$final['rodeo',]    # neanderthal w/o shan or djd
##             X2    P-Value Cramers V
## rodeo 16.43343 0.01160729 0.1850306
similarSelector<-function(frame,final_frame,neander_sample){ 
  
  require(reshape)
  require(plyr)
  indices=rownames(frame$similar)               # find rows to set as indices
  new_rows = final_frame[indices,]              # map those to original contigency table
  
  sample =append(c("neander","rodeo"),rownames(new_rows))  # add a new vector to use a "sample" column
  
  rodeo = final_frame['rodeo',] # add rodeo riders
  
  joined_rows = rbind(neander_sample,rodeo,new_rows) # join rows from neanderthal sample to new dataframe
  
  props = prop.table(as.table(as.matrix(joined_rows)),1)
  
  props<-as.data.frame.matrix(props)
  
  joined_cols = cbind(sample,props)       # add the "sample" column
  
  rownames(joined_cols) = NULL                  # remove the row indices 
      
  melted <- melt(joined_cols, id=(c("sample"))) # transpose contigency table
  
  return(melted)                
}
# an example of how similarSelector works
simToNeanderTotal<-similarSelector(n_tot,final,sample1)
simToNeanderWOdjd<-similarSelector(n_djd,final,sample2)
simToNeanderWOShan<-similarSelector(n_s,final,sample3)
simToNeanderWOdjdOrShan<-similarSelector(n_sd,final,sample4)

Find jus the neanderthals in the above dataframes

# extract just the neanderthal row for emphasis in plots
n2<-simToNeanderTotal[simToNeanderTotal$sample=='neander',]
d2<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='neander',]
s2<-simToNeanderWOShan[simToNeanderWOShan$sample=='neander',]
sd<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='neander',]

# extract just the rodeo riders for emphasis in plots
n3<-simToNeanderTotal[simToNeanderTotal$sample=='rodeo',]
d3<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='rodeo',]
s3<-simToNeanderWOShan[simToNeanderWOShan$sample=='rodeo',]
sd3<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='rodeo',]

Plot those that are similar

plotMaker<-function(total_frame,neander_frame,rodeo_frame){
  ggplot(data=total_frame, # this needs to be one of the dataframes directly above
         aes(x=factor(variable), y=value, 
             group=sample,
             color=sample)) + 
    geom_line() + 
    geom_point() +
    geom_point(data=neander_frame,aes(x=factor(variable), y=value, 
                                      group=sample, size = 4))+
    geom_line(data=neander_frame,aes(x=factor(variable), y=value, 
                          group=sample, size = 2))+ 
    geom_point(data=rodeo_frame,aes(x=factor(variable), y=value, 
                                      group=sample, size = 4))+
    geom_line(data=rodeo_frame,aes(x=factor(variable), y=value, 
                                     group=sample, size = 2))+ 
    scale_x_discrete("Proportion") +
    scale_y_continuous("Body Part")+
    guides(size=FALSE)
}

Plots

The total sample

plotMaker(simToNeanderTotal,n2,n3)

The sample with out djd

plotMaker(simToNeanderWOdjd,d2,d3)

The sample without Shandidar

plotMaker(simToNeanderWOShan,s2,s3)

The sample without djd or Shan

plotMaker(simToNeanderWOdjdOrShan,sd,sd3)